#!/usr/bin/python
# -*- coding: UTF-8 -*-

from __future__ import with_statement

__version__ = "$Id: pdb_reverter.py 54 2010-05-22 16:58:55Z sedlakf $"

import os.path
import re
import sys

def pdb_revert(filename, repl_agct=True,
                         repl_o1p=True,
                         repl_apostrophe=True,
                         repl_c7=True):
    """Opens `filename` (remediated PDB file) and performs some basic changes
    in its "ATOM" lines in order to be succesfully read by HBPLUS (so that the
    output file resembles the non-remediated PDB). Then, this function tries to
    save the file with .pdb suffix added to the original name (so that 8BXDA.pdb
    becomes 8BXDA.pdb.pdb). It doesn't care about the IOError exceptions, treat
    them at higher level.
    These updates are done (each of them can be turned off by corresponding
    argument):
    * Residues of DNA named DX are transformed to X
    * Atoms in the form OP{number} are translated to O{number}P 
      ({number} is always only one digit)
    * ' are replaced by *
    * C7 (methyl's C in thymine) is mapped to C5M
    """

    # DA  ->  A
    agct = re.compile(r"D([AGCTU])")
    # OP1 -> O1P (vyměnit opravdu všechny?)
    o1p = re.compile(r"(O)(P)([0-9])")

    lines = []
    ret = "File" + filename

    file = open(filename)
    lines = file.readlines()
    file.close()

    for i, line in enumerate(lines):
        if line.startswith("ATOM"):
            # nahrazování remediated názvů atomů a reziduí
            if repl_agct:
                lines[i] = agct.sub(r" \1", lines[i])
            if repl_apostrophe:
                lines[i] = lines[i].replace("'","*")
            if repl_c7:
                lines[i] = lines[i].replace("C7 ","C5M")
            if repl_o1p:
                lines[i] = o1p.sub(r"\1\3\2", lines[i])
    with open(filename + ".pdb", "w") as file:
        file.writelines(lines)


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print """Usage: %s file_to_revert

    PDB reverter opens a file (remediated PDB file) and performs some basic
    changes in its "ATOM" lines in order to be succesfully read by HBPLUS 
    (so that the output file resembles the non-remediated PDB). Then, the
    file is saved with .pdb suffix added to the original name (9ZZZ.pdb
    becomes 9ZZZ.pdb.pdb).
    
    It performs the following updates:
    * Residues of DNA named DX are transformed to X
    * Atoms in the form OP{number} are translated to O{number}P 
      ({number} is always only one digit)
    * ' is replaced by * (like in C1' -> C1*)
    * C7 (methyl's C in thymine) is translated to C5M"""
        sys.exit(1)

    pdb_revert(sys.argv[1])
